import json
import os
import re

def update_image_paths(input_json_path, output_json_path, updated_path):
    # 读取原始 JSON 文件
    with open(input_json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # 处理路径格式
    for item in data:
        conversation = item["conversations"]
        for message in conversation:
            if message["from"] == "user":
                # path = message["value"].replace('<.+>', '')
                path = message["value"].replace('<|vision_start|>', '')
                path = path.replace('<|vision_end|>', '')
                path = path.replace('COCO Yes: ', '')
                name = os.path.basename(path)
                print(f'path,name:{path,name}')
                message["value"] = f'COCO Yes: <|vision_start|>{os.path.join(updated_path, name)}<|vision_end|>'
                

    # 保存处理后的数据到新的 JSON 文件
    with open(output_json_path, 'w', encoding='utf-8') as f:

        json.dump(data, f, ensure_ascii=False, indent=4)

# 使用示例
input_json_path = 'data_vl.json'
output_json_path = 'data_vl_updated.json'
update_image_paths(input_json_path, output_json_path, '/home/bml/storage/mnt/v-45qy8woc6te4kuv9/org/hy/datasets/coco_2014_caption')